From f715a4ce18d77ae452972bf69d48f8f09d225d9e Mon Sep 17 00:00:00 2001 From: "Ian.Campbell@xensource.com" Date: Mon, 22 May 2006 09:23:15 +0100 Subject: [PATCH] Convert x86/64 Linux to use the new memory map hypercall. This change removes several of the differences between the bare-metal and Xen versions. Signed-off-by: Ian Campbell --- .../arch/x86_64/kernel/e820-xen.c | 196 ++++++++---------- .../arch/x86_64/kernel/setup-xen.c | 64 +++--- .../arch/x86_64/mm/init-xen.c | 26 ++- .../include/asm-x86_64/e820.h | 4 +- 4 files changed, 140 insertions(+), 150 deletions(-) diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c index 008eb92488..0f5dd54fa3 100644 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c @@ -26,8 +26,6 @@ #include #include -unsigned long pci_mem_start = 0xaeedbabe; - /* * PFN of last memory page. */ @@ -47,15 +45,15 @@ unsigned long end_pfn_map; unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; #ifndef CONFIG_XEN - - extern struct resource code_resource, data_resource; +#endif /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) { unsigned long addr = *addrp, last = addr + size; +#ifndef CONFIG_XEN /* various gunk below that needed for SMP startup */ if (addr < 0x8000) { *addrp = 0x8000; @@ -83,9 +81,16 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) return 1; } /* XXX ramdisk image here? */ +#else + if (last < (table_end<name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; default: res->name = "reserved"; } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; + res->start = e820[i].addr; + res->end = res->start + e820[i].size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); +#ifndef CONFIG_XEN if (e820.map[i].type == E820_RAM) { /* * We don't know which RAM region contains kernel data, @@ -257,74 +264,9 @@ void __init e820_reserve_resources(void) request_resource(res, &crashk_res); #endif } +#endif } } -#else -void __init e820_reserve_resources(void) -{ - dom0_op_t op; - struct dom0_memory_map_entry *map; - unsigned long gapstart, gapsize, round, last; - int i, found = 0; - - if (!(xen_start_info->flags & SIF_INITDOMAIN)) - return; - - map = alloc_bootmem_low_pages(PAGE_SIZE); - op.cmd = DOM0_PHYSICAL_MEMORY_MAP; - set_xen_guest_handle(op.u.physical_memory_map.memory_map, map); - op.u.physical_memory_map.max_map_entries = - PAGE_SIZE / sizeof(struct dom0_memory_map_entry); - BUG_ON(HYPERVISOR_dom0_op(&op)); - - last = 0x100000000ULL; - gapstart = 0x10000000; - gapsize = 0x400000; - - for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) { - struct resource *res; - - if ((last > map[i].end) && ((last - map[i].end) > gapsize)) { - gapsize = last - map[i].end; - gapstart = map[i].end; - found = 1; - } - if (map[i].start < last) - last = map[i].start; - - if (map[i].end > 0x100000000ULL) - continue; - res = alloc_bootmem_low(sizeof(struct resource)); - res->name = map[i].is_ram ? "System RAM" : "reserved"; - res->start = map[i].start; - res->end = map[i].end - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - request_resource(&iomem_resource, res); - } - - free_bootmem(__pa(map), PAGE_SIZE); - - if (!found) { - gapstart = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); - gapstart = (gapstart << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); -} -#endif /* CONFIG_XEN */ /* * Add a memory region to the kernel e820 map. @@ -370,7 +312,6 @@ void __init e820_print_map(char *who) } } -#ifndef CONFIG_XEN /* * Sanitize the BIOS e820 map. * @@ -557,9 +498,13 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) */ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) { +#ifndef CONFIG_XEN /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) return -1; +#else + BUG_ON(nr_map < 1); +#endif do { unsigned long start = biosmap->addr; @@ -571,6 +516,7 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) if (start > end) return -1; +#ifndef CONFIG_XEN /* * Some BIOSes claim RAM in the 640k - 1M region. * Not right. Fix it up. @@ -589,12 +535,14 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) size = end - start; } } +#endif add_memory_region(start, size, type); } while (biosmap++,--nr_map); return 0; } +#ifndef CONFIG_XEN void __init setup_memory_region(void) { char *who = "BIOS-e820"; @@ -628,39 +576,63 @@ void __init setup_memory_region(void) #else /* CONFIG_XEN */ -extern unsigned long xen_override_max_pfn; -extern union xen_start_info_union xen_start_info_union; - -unsigned long __init e820_end_of_ram(void) +void __init setup_memory_region(void) { - unsigned long max_end_pfn; - - if (xen_override_max_pfn == 0) { - max_end_pfn = xen_start_info->nr_pages; - /* Default 8MB slack (to balance backend allocations). */ - max_end_pfn += 8 << (20 - PAGE_SHIFT); - } else if (xen_override_max_pfn > xen_start_info->nr_pages) { - max_end_pfn = xen_override_max_pfn; - } else { - max_end_pfn = xen_start_info->nr_pages; + int rc; + struct xen_memory_map memmap; + /* + * This is rather large for a stack variable but this early in + * the boot process we know we have plenty slack space. + */ + struct e820entry map[E820MAX]; + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, map); + + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if ( rc == -ENOSYS ) { + memmap.nr_entries = 1; + map[0].addr = 0ULL; + map[0].size = xen_start_info->nr_pages << PAGE_SHIFT; + /* 8MB slack (to balance backend allocations). */ + map[0].size += 8 << 20; + map[0].type = E820_RAM; + rc = 0; } + BUG_ON(rc); - return max_end_pfn; -} + sanitize_e820_map(map, (char *)&memmap.nr_entries); -unsigned long __init -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn) -{ - return 0; -} + BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0); + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + e820_print_map("Xen"); +} #endif void __init parse_memopt(char *p, char **from) { + int i; + unsigned long current_end; + unsigned long end; + end_user_pfn = memparse(p, from); end_user_pfn >>= PAGE_SHIFT; - xen_override_max_pfn = (unsigned long) end_user_pfn; + + end = end_user_pfn<= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; + unsigned long long start = e820[i].addr; + unsigned long long end = start + e820[i].size; /* * Since "last" is at most 4GB, we know we'll @@ -730,16 +703,15 @@ __init void e820_setup_gap(void) } /* - * Start allocating dynamic PCI memory a bit into the gap, - * aligned up to the nearest megabyte. - * - * Question: should we try to pad it up a bit (do something - * like " + (gapsize >> 3)" in there too?). We now have the - * technology. + * See how much we want to round up: start off with + * rounding to the next 1MB area. */ - pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); -#endif } diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c index b6ed190a2d..13ad2b02bc 100644 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c @@ -76,8 +76,8 @@ #include #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) #define PFN_PHYS(x) ((x) << PAGE_SHIFT) -#define end_pfn_map end_pfn #include +#include extern unsigned long start_pfn; extern struct edid_info edid_info; @@ -490,19 +490,6 @@ static __init void parse_cmdline_early (char ** cmdline_p) } #ifndef CONFIG_NUMA -#ifdef CONFIG_XEN -static void __init -contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) -{ - unsigned long bootmap_size; - - bootmap_size = init_bootmem(start_pfn, end_pfn); - free_bootmem(0, xen_start_info->nr_pages << PAGE_SHIFT); - reserve_bootmem(HIGH_MEMORY, - (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1) - - HIGH_MEMORY); -} -#else static void __init contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { @@ -513,10 +500,13 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n",bootmap_size); bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); +#ifdef CONFIG_XEN + e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<flags & SIF_INITDOMAIN) { + machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); + + memmap.nr_entries = E820MAX; + set_xen_guest_handle(memmap.buffer, machine_e820); + + BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); + + e820_reserve_resources(machine_e820, memmap.nr_entries); + } +#elif !defined(CONFIG_XEN) + e820_reserve_resources(e820.map, e820.nr_map); #endif request_resource(&iomem_resource, &video_ram_resource); @@ -951,7 +956,14 @@ void __init setup_arch(char **cmdline_p) request_resource(&ioport_resource, &standard_io_resources[i]); } - e820_setup_gap(); +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) + if (xen_start_info->flags & SIF_INITDOMAIN) { + e820_setup_gap(machine_e820, memmap.nr_entries); + free_bootmem(__pa(machine_e820), PAGE_SIZE); + } +#elif !defined(CONFIG_XEN) + e820_setup_gap(e820.map, e820.nr_map); +#endif #ifdef CONFIG_GART_IOMMU iommu_hole_init(); diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c index ab49597889..1de014d05b 100644 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c @@ -370,7 +370,7 @@ void __set_fixmap_user (enum fixed_addresses idx, unsigned long phys, pgprot_t p set_pte_phys(address, phys, prot, SET_FIXMAP_USER); } -unsigned long __initdata table_start, tables_space; +unsigned long __initdata table_start, table_end; unsigned long get_machine_pfn(unsigned long addr) { @@ -409,11 +409,17 @@ static inline int make_readonly(unsigned long paddr) { int readonly = 0; - /* Make old and new page tables read-only. */ + /* Make new page tables read-only. */ + if (!xen_feature(XENFEAT_writable_page_tables) + && (paddr >= (table_start << PAGE_SHIFT)) + && (paddr < (table_end << PAGE_SHIFT))) + readonly = 1; + /* Make old page tables read-only. */ if (!xen_feature(XENFEAT_writable_page_tables) && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map)) - && (paddr < ((table_start << PAGE_SHIFT) + tables_space))) + && (paddr < (start_pfn << PAGE_SHIFT))) readonly = 1; + /* * No need for writable mapping of kernel image. This also ensures that * page and descriptor tables embedded inside don't have writable @@ -544,7 +550,7 @@ void __init xen_init_pt(void) mk_kernel_pgd(__pa_symbol(level3_user_pgt))); } -void __init extend_init_mapping(void) +void __init extend_init_mapping(unsigned long tables_space) { unsigned long va = __START_KERNEL_map; unsigned long phys, addr, *pte_page; @@ -599,23 +605,23 @@ void __init extend_init_mapping(void) static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, ptes; + unsigned long puds, pmds, ptes, tables; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT; - tables_space = - round_up(puds * 8, PAGE_SIZE) + + tables = round_up(puds * 8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE) + round_up(ptes * 8, PAGE_SIZE); - extend_init_mapping(); + extend_init_mapping(tables); table_start = start_pfn; + table_end = table_start + (tables>>PAGE_SHIFT); early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, table_start << PAGE_SHIFT, start_pfn << PAGE_SHIFT); + end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); } /* Setup the direct mapping of the physical memory at PAGE_OFFSET. @@ -660,7 +666,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); } - BUG_ON(!after_bootmem && start_pfn != table_start + (tables_space >> PAGE_SHIFT)); + BUG_ON(!after_bootmem && start_pfn != table_end); __flush_tlb_all(); } diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h b/linux-2.6-xen-sparse/include/asm-x86_64/e820.h index 8dcc326652..d99a8e5465 100644 --- a/linux-2.6-xen-sparse/include/asm-x86_64/e820.h +++ b/linux-2.6-xen-sparse/include/asm-x86_64/e820.h @@ -45,12 +45,12 @@ extern void add_memory_region(unsigned long start, unsigned long size, extern void setup_memory_region(void); extern void contig_e820_setup(void); extern unsigned long e820_end_of_ram(void); -extern void e820_reserve_resources(void); +extern void e820_reserve_resources(struct e820entry *e820, int nr_map); extern void e820_print_map(char *who); extern int e820_mapped(unsigned long start, unsigned long end, unsigned type); extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); -extern void e820_setup_gap(void); +extern void e820_setup_gap(struct e820entry *e820, int nr_map); extern unsigned long e820_hole_size(unsigned long start_pfn, unsigned long end_pfn); -- 2.30.2